# -*- coding: utf-8 -*-

import scrapy
from test1.items import miitbaozha
from scrapyluke.processors import *
import re
import time
import datetime

class ChinahrSpider(scrapy.Spider):
    name = 'baozha'
    start_urls = ['http://shenpi.miit.gov.cn/commonquery/result?categoryId=321&pagenow=1']

    def parse(self,response):
        if response.xpath('//input[@name="aId"]/@value'):
            #利用xpath获取各公司的aid
            job_url= response.xpath('//input[@name="aId"]/@value').extract()
            for aidnum in job_url:
                #利用aid发送请求
                aidnum2 = str(aidnum)
                yield scrapy.FormRequest("http://shenpi.miit.gov.cn/commonquery/showDetail",
                                   formdata={'aid':aidnum2},
                                   callback=self.parse_info)
            page = re.findall('oryId=321&pagenow=(\d+)', response.url)[0]
            #翻页
            curr_page = int(page) + 1
            yield scrapy.Request('http://shenpi.miit.gov.cn/commonquery/result?categoryId=321&pagenow=%d' % curr_page, callback= self.parse)

    def parse_info(self, response):
         if response.body:
            cdianzi_info = miitbaozha()
            getbody = response.body
            body_list = list(eval(getbody))
            for dict in body_list:                     #判断是否存在相关内容,并放入item(这网站所有的字典的key值居然都相同，是正常人写的么？！)
                checkstr = dict['deffieldName']
                databuf = dict['deffieldValue']
                if '进出口申请企业' in checkstr:
                    cdianzi_info['for_the_import_and_export_enterprises'] = databuf
                elif '审批 年度' in checkstr:
                    cdianzi_info['the_annual_examination_and_approval'] = databuf
                elif '审批 编号' in checkstr:
                    cdianzi_info['approval_number'] = databuf
                elif '合同编号' in checkstr:
                    cdianzi_info['the_contract_no_'] = databuf
                elif '贸易方式' in checkstr:
                    cdianzi_info['trade_mode'] = databuf
                elif '申请进口用途及理由' in checkstr:
                    cdianzi_info['for_the_purpose_of_import_and_reason'] = databuf
                elif '申请日期' in checkstr:
                    cdianzi_info['date_of_application'] = databuf
                elif '工信部 审核状态' in checkstr:
                    cdianzi_info['the_ministry_of_state_audit'] = databuf
            time = datetime.datetime.now()
            curr_time = str(time)
            cdianzi_info['insert_time'] = curr_time
            cdianzi_info['url'] = response.url
            return cdianzi_info
